#Importing the relevant modules
import requests
import io
from bs4 import BeautifulSoup
import pandas as pd
import numpy as np
import matplotlib as mpl
from nltk.corpus import stopwords
from nltk.stem.snowball import SnowballStemmer
stemmer = SnowballStemmer("english")
from wordcloud import (WordCloud, get_single_color_func)
import seaborn as sns
import matplotlib.pyplot as plt
import squarify
from urllib.request import urlopen
import json
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
counties = json.load(response)
import plotly.express as px
import os
import sys
import mpl_chord_diagram as mpl_cd
from mpl_chord_diagram import chord_diagram
from IPython.display import display, Image
Title: Loans vis-a-vis euro area insurance corporations & pension funds reported by MFI in the euro area (stock)
Key: BSI.M.U2.Y.U.A20.A.1.U2.2220.Z01.E
Source: https://www.pythonsherpa.com/static/files/html/ECB%20-%20Open%20Data.html (customized)
# Building blocks for the URL
entrypoint = 'https://sdw-wsrest.ecb.europa.eu/service/'
resource = 'data'
flowRef ='BSI'
key = 'M.U2.Y.U.A20.A.1.U2.2220.Z01.E'
parameters = {
'startPeriod': '2018-01-31',
'endPeriod': '2022-06-30'
}
# Construct the URL: https://sdw-wsrest.ecb.europa.eu/service/data/EXR/D.CHF.EUR.SP00.A
request_url = entrypoint + resource + '/'+ flowRef + '/' + key
# Make the HTTP request
response = requests.get(request_url, params=parameters)
# Make the HTTP request again, now requesting for CSV format
response = requests.get(request_url, params=parameters, headers={'Accept': 'text/csv'})
# Read the response as a file into a Pandas DataFrame
df = pd.read_csv(io.StringIO(response.text))
df.head(3)
# Create a new DataFrame
df = df.filter(['TIME_PERIOD', 'OBS_VALUE'], axis=1)
df['TIME_PERIOD'] = pd.to_datetime(df['TIME_PERIOD'])
df = df.set_index('TIME_PERIOD')
df.tail()
height = 6
wide = 14
lineplot_color = 'black'
titel_fontsize = 18
title_distance = 10
labels_fontsize = 16
legend_fontsize = 13
save_dpi = 300
plt.style.use(['seaborn-white', 'seaborn-paper'])
plt.rc("font", family="Cambria")
plt.figure(figsize=(wide, height))
plt.plot(df, linewidth=2)
plt.ylabel(ylabel = "Millions of Euro", size=labels_fontsize)
plt.xticks(fontsize=labels_fontsize)
plt.yticks(fontsize=labels_fontsize)
plt.title("Loans vis-a-vis euro area insurance corporations & pension funds \n reported by MFI in the euro area (stock)", fontweight="bold", size=titel_fontsize, pad=title_distance)
plt.grid(all)
plt.savefig(f'Chart_1.png', dpi=save_dpi, bbox_inches='tight')
def get_colors2():
return np.array([
[0.25, 0.25, 0.25], # very dark gray
[0.667, 0.224, 0.224], # red A13646
[0.667, 0.424, 0.224], # brown AA6439
[0.131, 0.400, 0.400], # blue 256E5D
[0.176, 0.533, 0.176] # green 4B9130
])
sns.palplot(get_colors2())
# Building blocks for the URL
entrypoint = 'https://sdw-wsrest.ecb.europa.eu/service/'
resource = 'data'
flowRef ='BSI'
# Loans vis-a-vis euro area NFC reported by MFI in the euro area (stock)
keys = ['M.U2.Y.U.A20.F.1.U2.2240.Z01.E', # Up to 1 year
'M.U2.Y.U.A20.I.1.U2.2240.Z01.E', # Over 1 and up to 5 years
'M.U2.Y.U.A20.J.1.U2.2240.Z01.E' # Over 5 years
]
parameters = {
'startPeriod': '2018-01-31',
'endPeriod': '2022-06-30'
}
i = 0
maturiy = ["F","I","J"]
for key in keys:
print(f'{key} stored in df_BSI_{maturiy[i]}')
# Construct the URL: https://sdw-wsrest.ecb.europa.eu/service/data/EXR/D.CHF.EUR.SP00.A
request_url = entrypoint + resource + '/'+ flowRef + '/' + key
# Make the HTTP request
response = requests.get(request_url, params=parameters)
# Make the HTTP request again, now requesting for CSV format
response = requests.get(request_url, params=parameters, headers={'Accept': 'text/csv'})
# Read the response as a file into a Pandas DataFrame
df = pd.read_csv(io.StringIO(response.text))
df = df.filter(['TIME_PERIOD', 'OBS_VALUE'], axis=1)
df['TIME_PERIOD'] = pd.to_datetime(df['TIME_PERIOD'])
df = df.set_index('TIME_PERIOD')
# storing in dynamic dataframes
exec(f'df_BSI_{maturiy[i]}= df.copy()')
i = i + 1
df_BSI_F = df_BSI_F.rename(columns={"OBS_VALUE": "Up to 1 year (F)"})
df_BSI_I = df_BSI_I.rename(columns={"OBS_VALUE": "Over 1 and up to 5 years (I)"})
df_BSI_J = df_BSI_J.rename(columns={"OBS_VALUE": "Over 5 years (J)"})
df_BSI_merged = pd.merge(df_BSI_F, df_BSI_I, left_index=True, right_index=True)
df_BSI_merged = pd.merge(df_BSI_merged, df_BSI_J, left_index=True, right_index=True)
df_BSI_merged.head(3)
plt.style.use(['seaborn-white'])
plt.rc("font", family="Cambria")
fig, ax = plt.subplots(figsize = (wide,height))
ax.plot(df_BSI_merged, linewidth=2)
ax.get_yaxis().set_major_formatter(
mpl.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
ax.legend(list(df_BSI_merged.columns), fontsize=legend_fontsize)
ax.set_ylabel("Millions of Euro", fontsize=labels_fontsize)
ax.tick_params(labelsize=labels_fontsize)
ax.set_title("Loans vis-a-vis euro area NFC reported by MFI in the euro area (stock)", fontweight="bold", size=titel_fontsize, pad=title_distance)
ax.grid(all)
fig.savefig(f'Chart_2.png', dpi=save_dpi, bbox_inches='tight')
plt.style.use(['seaborn-white'])
plt.rc("font", family="Cambria")
fig, ax = plt.subplots(figsize = (wide,height))
ax.get_yaxis().set_major_formatter(mpl.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
ax.boxplot(df_BSI_merged.reset_index().drop("TIME_PERIOD", axis=1).T)
ax.set_ylabel("Millions of Euro", fontsize=labels_fontsize)
ax.tick_params(labelsize=labels_fontsize)
ax.set_xticklabels(list(df_BSI_merged.columns))
ax.set_title("Loans vis-a-vis euro area NFC reported by MFI in the euro area (stock) \n 2018-01 to 2022-05", fontweight="bold", size=titel_fontsize, pad=title_distance)
ax.grid(all)
fig.savefig(f'Chart_3.png', dpi=save_dpi, bbox_inches='tight')
plt.style.use(['seaborn-white'])
plt.rc("font", family="Cambria")
fig, ax = plt.subplots(figsize = (wide,height))
ax.bar(df_BSI_merged.resample('Y').mean().index.year-0.2, df_BSI_merged.resample('Y').mean()[df_BSI_merged.columns[0]], width = 0.2, color='#A13646')
ax.bar(df_BSI_merged.resample('Y').mean().index.year, df_BSI_merged.resample('Y').mean()[df_BSI_merged.columns[1]], width = 0.2, color='#AA6439')
ax.bar(df_BSI_merged.resample('Y').mean().index.year+0.2, df_BSI_merged.resample('Y').mean()[df_BSI_merged.columns[2]], width = 0.2, color='#256E5D')
ax.get_yaxis().set_major_formatter(
mpl.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
ax.legend(list(df_BSI_merged.resample('Y').sum().columns), fontsize=legend_fontsize)
ax.set_ylabel("Millions of Euro", fontsize=labels_fontsize)
ax.tick_params(labelsize=labels_fontsize)
ax.set_title("Loans vis-a-vis euro area NFC reported by MFI in the euro area (stock)", fontweight="bold", size=titel_fontsize, pad=title_distance)
ax.grid(all)
fig.savefig(f'Chart_4.png', dpi=save_dpi, bbox_inches='tight')
plt.style.use(['seaborn-white'])
plt.rc("font", family="Cambria")
fig, ax = plt.subplots(figsize = (wide,height))
ax.get_yaxis().set_major_formatter(mpl.ticker.FuncFormatter(lambda x, p: format(int(x), ',')))
ax.stackplot(df_BSI_merged.index,
df_BSI_merged[df_BSI_merged.columns[0]],
df_BSI_merged[df_BSI_merged.columns[1]],
df_BSI_merged[df_BSI_merged.columns[2]],
labels=list(df_BSI_merged.columns), colors=get_colors2(), alpha=0.7)
ax.legend(list(df_BSI_merged.columns), fontsize=legend_fontsize, bbox_to_anchor=(1.275, 1.00))
ax.set_ylabel("Millions of Euro", fontsize=labels_fontsize)
ax.tick_params(labelsize=labels_fontsize)
ax.set_title("Loans vis-a-vis euro area NFC reported by MFI in the euro area (stock) \n 2018-01 to 2022-05", fontweight="bold", size=titel_fontsize, pad=title_distance)
ax.grid(all)
fig.savefig(f'Chart_5.png', dpi=save_dpi, bbox_inches='tight')
plt.style.use(['seaborn-white'])
plt.rc("font", family="Cambria")
fig, ax = plt.subplots(figsize = (wide,height))
squarify.plot(label=list(df_BSI_merged.columns), sizes=df_BSI_merged.mean(), color = get_colors2(), alpha=.4, text_kwargs={'fontsize':15})
ax.set_title("Loans vis-a-vis euro area NFC reported by MFI in the euro area (stock) \n 2018-01 to 2022-05 (mean)", fontweight="bold", size=titel_fontsize, pad=title_distance)
ax.set_yticklabels([])
ax.set_xticklabels([])
fig.savefig(f'Chart_6.png', dpi=save_dpi, bbox_inches='tight')
# Building blocks for the URL
entrypoint = 'https://sdw-wsrest.ecb.europa.eu/service/'
resource = 'data'
flowRef ='BSI'
# Loans vis-a-vis MFI reported by MFI excluding ESCB in Germany (stock)
key_p1 = 'Q.DE.N.A.A20.A.1.'
key_p2 = '.1000.Z01.E'
country_list = ['AT','BE','BG','CY','CZ','DK','EE','ES','FI','FR','GB','GR','HR','HU','IE','IT','LT',
'LU','LV','MT','NL','PL','PT','RO','SE','SI','SK']
parameters = {
'startPeriod': '2021-12-31',
'endPeriod': '2022-03-31'
}
df_count_area = pd.DataFrame()
for country in country_list:
#print(f'{key_p1}{country}{key_p2}')
# Construct the URL: https://sdw-wsrest.ecb.europa.eu/service/data/EXR/D.CHF.EUR.SP00.A
request_url = entrypoint + resource + '/'+ flowRef + '/' + key_p1 + country + key_p2
# Make the HTTP request
response = requests.get(request_url, params=parameters)
# Make the HTTP request again, now requesting for CSV format
response = requests.get(request_url, params=parameters, headers={'Accept': 'text/csv'})
# Read the response as a file into a Pandas DataFrame
df_temp = pd.read_csv(io.StringIO(response.text))
df_temp = df_temp.filter(['TIME_PERIOD', 'OBS_VALUE', 'COUNT_AREA'], axis=1)
df_count_area = df_count_area.append(df_temp)
#print(df)
df_count_area = df_count_area.reset_index(drop=True)
country_dict = {
'AT' : 'Austria',
'BE' : 'Belgium',
'BG' : 'Bulgaria',
'CY' : 'Cyprus',
'CZ' : 'Czechia',
'DK' : 'Denmark',
'EE' : 'Estonia',
'ES' : 'Spain',
'FI' : 'Finland',
'FR' : 'France',
'GB' : 'United Kingdom of Great Britain and Northern Ireland',
'GR' : 'Greece',
'HR' : 'Croatia',
'HU' : 'Hungary',
'IE' : 'Ireland',
'IT' : 'Italy',
'LT' : 'Lithuania',
'LU' : 'Luxembourg',
'LV' : 'Latvia',
'MT' : 'Malta',
'NL' : 'Netherlands',
'PL' : 'Poland',
'PT' : 'Portugal',
'RO' : 'Romania',
'SE' : 'Sweden',
'SI' : 'Slovenia',
'SK' : 'Slovakia'
}
df_count_area['country_name'] = df_count_area['COUNT_AREA'].replace(country_dict)
fig = px.choropleth(df_count_area, geojson=counties, locations='country_name', color='OBS_VALUE',
color_continuous_scale="Viridis",
range_color=(0, df_count_area['OBS_VALUE'].max()),
scope="europe",
locationmode = "country names",
labels={'OBS_VALUE':'Millions of Euro'},
title="Loans vis-a-vis MFI reported by MFI excluding ESCB in Germany (stock) 2022Q1"
)
fig.update_geos(fitbounds="locations")
fig.update_layout(height=700, width=800, margin={"r":0,"t":80,"l":0,"b":0})
#plt.savefig(f'Chart_map.png', dpi=save_dpi, bbox_inches='tight')
fig.show()
display(Image(filename='Animation.gif'))
# Building blocks for the URL
entrypoint = 'https://sdw-wsrest.ecb.europa.eu/service/'
resource = 'data'
flowRef ='BSI'
# Loans vis-a-vis MFI reported by MFI excluding ESCB - country to country (selected countries) (stock) - 2021-Q4
key_p1 = '.N.A.A20.A.1.'
key_p2 = '.1000.Z01.E'
country_list = ['AT','BE','BG','CY','CZ','DK','DE','EE','ES','FI','FR','GB','GR','HR','HU','IE','IT','LT',
'LU','LV','MT','NL','PL','PT','RO','SE','SI','SK']
parameters = {
'startPeriod': '2021-09-30',
'endPeriod': '2021-12-31'
}
df_chord = pd.DataFrame()
for country_ref in country_list:
for country_count in country_list:
if country_ref != country_count:
#print(f'Q.{country_ref}{key_p1}{country_count}{key_p2}')
# Construct the URL: https://sdw-wsrest.ecb.europa.eu/service/data/EXR/D.CHF.EUR.SP00.A
request_url = entrypoint + resource + '/'+ flowRef + '/Q.' + country_ref + key_p1 + country_count + key_p2
# Make the HTTP request
response = requests.get(request_url, params=parameters)
# Make the HTTP request again, now requesting for CSV format
response = requests.get(request_url, params=parameters, headers={'Accept': 'text/csv'})
# Read the response as a file into a Pandas DataFrame
if response.text != '':
df_temp = pd.read_csv(io.StringIO(response.text))
df_temp = df_temp.filter(['OBS_VALUE','REF_AREA', 'COUNT_AREA'], axis=1)
df_chord = df_chord.append(df_temp)
#print(df)
df_chord = df_chord.replace(np.nan,0)
df_chord.head(3)
df_chord = df_chord.sort_values(['REF_AREA','COUNT_AREA'])
df_pivot_chord = pd.pivot_table(df_chord, values='OBS_VALUE', index='REF_AREA',
columns='COUNT_AREA', aggfunc=np.sum, fill_value=0)
df_pivot_chord = df_pivot_chord[list(df_pivot_chord.index)]
df_pivot_chord = df_pivot_chord.reset_index()
df_pivot_chord = df_pivot_chord.round(0)
col_names = list(df_pivot_chord.columns)
col_names.remove('REF_AREA')
chord_matrix = df_pivot_chord[col_names].to_numpy()
chord_diagram(chord_matrix, col_names, gap=0.03, use_gradient=True, sort="size", directed=False,
cmap=None, chord_colors=None, rotate_names=False, fontcolor="grey", min_chord_width=1000, show=False, fontsize=7)
plt.savefig(f'Chart_7.png', dpi=save_dpi, bbox_inches='tight')
plt.close()
display(Image(filename='Chart_7.png'))
The following short script is used to retrieve the text of the job advertisement (Research Analyst - Statistics (ESCB/IO)), process it and create a word cloud (size = number of terms stemmed)
Source: https://matix.io/extract-text-from-webpage-using-beautifulsoup-and-python/ (Customized)
# retrieval of the information of the website
url = 'https://talent.ecb.europa.eu/careers/JobDetail?jobId=5856'
res = requests.get(url)
html_page = res.content
soup = BeautifulSoup(html_page, 'html.parser')
text = soup.find_all(text=True)
output = ''
blacklist = ['[document]','a','article','aside','body','footer','h4','head','header','html',
'label','link','main','meta','p','script','section','ul']
for t in text:
if t.parent.name not in blacklist:
output += '{} '.format(t)
# Removing stopwords
output = ' '.join([word for word in output.split() if word not in (stopwords.words('english'))])
print(f'{output[:1000]}...')
# remove (punctuation, numbers, special characters)
special = "1234567890!#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~©€"
for i in list(special):
output = output.replace(i,' ')
output = output.replace(' ',' ')
output = output.lower()
word_list = output.split()
#Stemming of every single word
stemmed_word_list = []
for y in word_list:
stemmed_word_list.append(stemmer.stem(y))
# Removing additional superfluous words
remove = ['the','of','you','as','e']
for i in remove:
while i in stemmed_word_list:
stemmed_word_list.remove(i)
stemmed_word_list[:10]
# Counting the frequency of the words (stemmed)
word_count = pd.value_counts(np.array(stemmed_word_list))
word_count_df = pd.DataFrame(word_count)
word_count_df.head(5)
#creating a round word cloud of the stemmed words
x, y = np.ogrid[:1000, :1000]
mask = (x - 500) ** 2 + (y - 500) ** 2 > 400 ** 2
mask = 255 * mask.astype(int)
data = word_count_df.to_dict()[0]
wordcloud = WordCloud(background_color="white", width=800, height=400, max_words=100, mask=mask).generate_from_frequencies(data)
plt.figure(figsize=(14, 14))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()